library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages ------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   0.8.5
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'tibble' was built under R version 3.6.3
## Warning: package 'tidyr' was built under R version 3.6.3
## Warning: package 'purrr' was built under R version 3.6.3
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts ---------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(gt)
## Warning: package 'gt' was built under R version 3.6.3
data("flights", package = "pnwflights14")
library(DT)
## Warning: package 'DT' was built under R version 3.6.3
flight_PDX<-flights%>%
  filter(origin=="PDX")
datatable(flight_PDX,rownames = FALSE)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
library(tidyverse)
track_record_PDX<-flight_PDX%>%
  group_by(month,carrier)%>%
  summarize(Avg_dep_delay=mean(dep_delay,na.rm = TRUE))%>%
  arrange(month,Avg_dep_delay)
datatable(track_record_PDX,rownames = FALSE)

Which airlines had the best and worst track records of on-time departures in each month? Is it different between PDX and SEA?

Average_track_PDX<-track_record_PDX%>%
    group_by(month) %>% 
    slice(which.min(Avg_dep_delay),which.max(Avg_dep_delay))
Average_track_PDX%>%gt(groupname_col = "month") %>%
  tab_header(title="PDX-Best and worst carrier in depature delay each month")%>%
  cols_label(month="Month",carrier="Carrier",Avg_dep_delay="Average dep delay")%>%
  cols_align(align="center") %>%
  tab_style(
    cell_text(style = "italic"),
    locations = cells_title(groups=c("title"))
  )
PDX-Best and worst carrier in depature delay each month
Carrier Average dep delay
1
HA -2.451613
F9 21.100000
2
HA -2.178571
VX 28.631579
3
HA -2.967742
AA 24.852459
4
HA -6.133333
AA 16.738889
5
AS -2.354346
WN 11.215622
6
HA -3.833333
F9 18.113636
7
HA -2.483871
WN 13.844753
8
HA -4.322581
AA 9.504673
9
VX -2.333333
HA 15.366667
10
VX -5.129032
AA 13.701754
11
HA 1.200000
WN 8.688581
12
HA -1.354839
AA 19.365854

The above table shows the best and worst airline departure delay every month.It seems in month 1st,best is HA whereas worst is F9, similarly in 2nd month, best is HA and worst is VX

flight_SEA<-flights%>%
  filter(origin=="SEA")
datatable(flight_SEA)
## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html
track_record_SEA<-flight_SEA%>%
  group_by(month,carrier)%>%
  summarize(Avg_dep_delay=mean(dep_delay,na.rm = TRUE))%>%
  arrange(month,Avg_dep_delay)
datatable(track_record_SEA,rownames = FALSE)
Average_track_SEA<-track_record_SEA%>%
    group_by(month) %>% 
    slice(which.min(Avg_dep_delay),which.max(Avg_dep_delay))
Average_track_SEA%>%gt() %>%
  tab_header(title="SEA-Best and worst carrier in depature delay each month")%>%
  cols_label(month="Month",carrier="Carrier",Avg_dep_delay="Average dep delay")%>%
  cols_align(align="center") %>%
  tab_style(
    cell_text(style = "italic"),
    locations = cells_title(groups=c("title"))
  )
SEA-Best and worst carrier in depature delay each month
Carrier Average dep delay
1
AS 1.85991262
F9 22.57843137
2
HA 0.33928571
F9 24.66666667
3
HA 2.19354839
WN 11.42794279
4
US 0.72830189
F9 12.56310680
5
HA -0.12903226
WN 12.46680080
6
HA -3.15000000
WN 21.21019608
7
HA 0.80645161
F9 21.71014493
8
OO 3.40660737
HA 19.63934426
9
US 0.03010033
WN 9.41641939
10
HA -2.12903226
AA 18.92592593
11
US 2.33333333
UA 11.55710660
12
US 2.43824701
VX 31.45374449

The best and worst track records of on-time departures of airlines in each month is different for PDX and SEA. For each city Portland and Seattle, it seems every month the position of best and worst airlines are getting changed, showing no consistency.

What cities have the most service from Portland (defined however you like, but do make sure to define it clearly!), and which have the worst?

most_service<-flight_PDX%>%
  group_by(dest)%>%
  summarise(flight=n())%>%
  arrange(desc(flight))
most_service%>%gt() %>%
  tab_header(title="PDX-Number of flights in each destination")%>%
  cols_label(dest="Destination",flight="Number of flights")%>%
  cols_align(align="center") %>%
  tab_style(
    cell_text(align = "right"), 
    locations = cells_title(groups = c("subtitle"))
  ) %>% 
  tab_style(
    cell_text(style = "italic"),
    locations = cells_title(groups=c("title"))
  )
PDX-Number of flights in each destination
Destination Number of flights
SFO 5179
DEN 3940
PHX 3570
LAX 3001
ORD 2501
LAS 2482
SLC 2465
SJC 2368
SEA 2289
OAK 1944
DFW 1884
SMF 1818
ATL 1558
SAN 1550
MSP 1362
BUR 1027
SNA 1023
LGB 1008
ANC 1000
IAH 998
ONT 925
JFK 854
RDM 731
HNL 730
MDW 676
EUG 636
OGG 605
EWR 510
BOS 504
IAD 371
DCA 365
MCI 365
SBA 365
TUS 365
DTW 310
ABQ 299
CLT 277
PSP 271
PHL 180
KOA 168
RNO 164
LMT 154
LIH 103
BOI 98
FAI 93
BWI 86
AUS 63
HOU 63
STL 37
max(most_service$flight)
## [1] 5179

From the above table, it shows SFO or San Fransisco has highest number of services from portland.STL has the lowest or worst number of services from PORTLAND.

Which airlines improved the most in terms of on-time departures over time, and on which routes? Which airlines got worse?

xtabs(Avg_dep_delay~carrier+month,track_record_PDX)
##        month
## carrier           1           2           3           4           5           6
##      AA  8.37158470 14.98125000 24.85245902 16.73888889 10.91959799 17.12500000
##      AS -1.18441815  2.48504983 -0.33514493 -1.72155412 -2.35434575  0.30747664
##      B6  9.10843373  9.25352113 -1.37974684  5.91764706  4.89565217  1.40157480
##      DL  4.05121294  9.17571885  2.09026128 -0.51288056  1.04444444  2.96007984
##      F9 21.10000000 13.79166667  6.11403509  2.67889908  7.64444444 18.11363636
##      HA -2.45161290 -2.17857143 -2.96774194 -6.13333333 -1.64516129 -3.83333333
##      OO  5.80552712  7.90307868  3.21081577  1.26673327  2.33487085  3.99004975
##      UA  9.12328767 15.69393140  5.80470588  1.46421268  7.90820312  8.21190893
##      US  4.36416185  1.06711409 -1.70370370  1.74626866 -1.21363636  2.20689655
##      VX -1.81609195 28.63157895  4.70588235  4.08139535  4.30337079  7.68852459
##      WN 15.82699387 17.22971114 10.31091510 11.33922652 11.21562156 16.84363296
##        month
## carrier           7           8           9          10          11          12
##      AA  6.90521327  9.50467290  8.90640394 13.70175439  8.61600000 19.36585366
##      AS  1.49910072  2.36147757  0.92829457  0.02506964  2.16844920  7.36274510
##      B6 11.67955801  6.30000000  7.38947368  2.83516484  8.20000000  3.21590909
##      DL  3.66785080  2.73713235  2.21250000  0.24129353  2.80169972  2.25490196
##      F9  9.42028986  1.98540146  4.76422764 -0.81415929  5.16666667 14.18181818
##      HA -2.48387097 -4.32258065 15.36666667  1.35483871  1.20000000 -1.35483871
##      OO  4.18908629  3.66990291  2.09876543  2.62011173  7.50389105 10.82581967
##      UA  6.45563140  6.94627383  5.44503546  7.40105079  5.42600897 10.79600887
##      US  4.32644628  0.46521739  0.26288660 -0.55801105  3.10691824  4.55688623
##      VX  7.29032258  6.45161290 -2.33333333 -5.12903226  4.44827586  7.13333333
##      WN 13.84475282  9.33173996  6.93010753  6.78040904  8.68858131 18.12652608
first_six_month<-flight_PDX%>%
  filter(month<7)
first_six_month_tidy<-first_six_month%>%
  group_by(carrier)%>%
  summarize(jan_to_june=median(dep_delay,na.rm = TRUE))
first_six_month_tidy%>%gt() %>%
  tab_header(title="PDX-dep_delay average from Jan to June")%>%
  cols_label(carrier="Carrier",jan_to_june="Jan to June")%>%
  cols_align(align="center") %>%
  tab_style(
    cell_text(align = "right"), 
    locations = cells_title(groups = c("subtitle"))
  ) %>% 
  tab_style(
    cell_text(style = "italic"),
    locations = cells_title(groups=c("title"))
  )
PDX-dep_delay average from Jan to June
Carrier Jan to June
AA -2
AS -5
B6 -4
DL -3
F9 -1
HA -6
OO -4
UA -2
US -4
VX -4
WN 2
last_six_month<-flight_PDX%>%
  filter(month>6)
last_six_month_tidy<-last_six_month%>%
  group_by(carrier)%>%
  summarize(july_to_dec=median(dep_delay,na.rm = TRUE))
last_six_month_tidy%>%gt() %>%
  tab_header(title="PDX-dep_delay average from July to Dec")%>%
  cols_label(carrier="Carrier",july_to_dec="July to Dec")%>%
  cols_align(align="center") %>%
  tab_style(
    cell_text(align = "right"), 
    locations = cells_title(groups = c("subtitle"))
  ) %>% 
  tab_style(
    cell_text(style = "italic"),
    locations = cells_title(groups=c("title"))
  )
PDX-dep_delay average from July to Dec
Carrier July to Dec
AA -2
AS -4
B6 -3
DL -3
F9 -4
HA -4
OO -4
UA -1
US -3
VX -3
WN 1
improvement<-merge(first_six_month_tidy,last_six_month_tidy,by="carrier")
improvement_final<-improvement%>%
  mutate(improvement_factor=july_to_dec-jan_to_june)%>%
  arrange(improvement_factor)

improvement_final%>%gt() %>%
  tab_header(title="PDX-Airlines improvement in dep_delay average ")%>%
  cols_label(carrier="Carrier",jan_to_june="Jan to June",july_to_dec="July to Dec",improvement_factor="Improvement factor")%>%
  cols_align(align="center") %>%
  tab_style(
    cell_text(align = "right"), 
    locations = cells_title(groups = c("subtitle"))
  ) %>% 
  tab_style(
    cell_text(style = "italic"),
    locations = cells_title(groups=c("title"))
  )
PDX-Airlines improvement in dep_delay average
Carrier Jan to June July to Dec Improvement factor
F9 -1 -4 -3
WN 2 1 -1
AA -2 -2 0
DL -3 -3 0
OO -4 -4 0
AS -5 -4 1
B6 -4 -3 1
UA -2 -1 1
US -4 -3 1
VX -4 -3 1
HA -6 -4 2

From the above table it shows that F9 improved a lot whereas HA has worst delay rate.

Using kable() function:

library(knitr)
## Warning: package 'knitr' was built under R version 3.6.3
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 3.6.3
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
first_Six<-flight_PDX%>%
  filter((carrier=="F9" | carrier=="HA")& month<7 )%>%
  group_by(dest,carrier)%>%
  summarize(first_avg=median(dep_delay,na.rm = TRUE))
column1=c("Destination","Carrier","First 6 month average")
first_Six%>%
    kable(format = "html", digits = 2, caption = "First six month average departure delay",col.names = column1) %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "center")
First six month average departure delay
Destination Carrier First 6 month average
DEN F9 -1
HNL HA -6
STL F9 -1
last_Six<-flight_PDX%>%
  filter((carrier=="F9" | carrier=="HA")& month>6)%>%
  group_by(dest,carrier)%>%
  summarize(second_avg=median(dep_delay,na.rm = TRUE))
column2=c("Destination","Carrier","Last 6 month average")
last_Six%>%
  kable(format = "html", caption = "Last six month average departure delay",col.names = column2) %>% 
  kable_styling(bootstrap_options = "striped", full_width = F, position = "center")
Last six month average departure delay
Destination Carrier Last 6 month average
DEN F9 -4
HNL HA -4
STL F9 -9

Finding out best and worst route for F9 and HA

final<-full_join(first_Six, last_Six, by = c("dest", "carrier"))%>%
  mutate(improvement_factor=second_avg-first_avg)

column=c("Destination","Carrier","First six month","Last 6 month","Improvement")
final%>% kable(format = "html", digits = 2, caption = "Improvement in average departure delay",col.names = column) %>% 
  kable_styling(bootstrap_options = "striped", full_width = F, position = "center")
Improvement in average departure delay
Destination Carrier First six month Last 6 month Improvement
DEN F9 -1 -4 -3
HNL HA -6 -4 2
STL F9 -1 -9 -8

The above data shows, carrier F9 improved alot over time for STL route and HA has worst improvement over months in HNL route.

In most of the table I used gt() and datatable() function. I liked the gt() function most. It has lots of parameters for customization as per the need. Also, I have seen how it grouped and represent the group_by data without mentioning it explicitly.I am not sure, but I think in datatable one can not add titles or renaming column headers. gt() function have all these additional customizations.I also used “kable” function and its also nice to represent table in paper publish format.I also, tried with kable() function. This one is also a good choice with lots of parameters to customize tables.I kept the alighment of the table as “centre”, which looks great to me.Also, in kable() function, I realise, the table header is in slightly lighter in shade. I am not very sure if it can be make dark.

FONTS

library(ggplot2)
library(extrafont)
## Warning: package 'extrafont' was built under R version 3.6.2
loadfonts(device = "win")
plot<-ggplot(improvement_final,aes(x=carrier,y=improvement_factor))+
  geom_col(aes(fill=improvement_factor))+
  theme(plot.title = element_text(hjust = 0.5)) +
  theme(axis.text = element_text(size = 15)) +
  labs(x = "Carrier", y = "Improvement in departure delay")+
  theme(panel.background = element_blank(),axis.line = element_line(colour = "black"))+
  theme(panel.border = element_rect(linetype = "dashed", fill = NA))+
  ggtitle(~""*underline("PDX-Airlines improvement in dep_delay average"))

Serif

library(extrafont)
loadfonts(device = "win")
library(showtext)
## Warning: package 'showtext' was built under R version 3.6.3
## Warning: package 'sysfonts' was built under R version 3.6.3
## Warning: package 'showtextdb' was built under R version 3.6.3
showtext_auto()
font_add_google("Tinos")
plot+theme(text=element_text(size=12,  family="Tinos",color = "brown"))

As per my findings above, the above plot also shows, F9 improved a lot over time whereas HA has worst average departure delay time.

San - Serif

font_add_google("Open Sans")
plot+theme(text=element_text(size=12,  family="Open Sans",color = "brown"))

Google Display font - lobster

showtext_opts(dpi = 72)
font_add_google("Lobster", "lobster")
plot+theme(text=element_text(size=22,  family="lobster",color = "brown"))

##plot+theme(text=(family="lobster",col = "steelblue", size = 3))

I have chosen 3 fonts for my plot texts. Serif , San serif and Display font.

Conclusion:

Tinos font within Serif categories caught my attention because it is very soothing and refreshing. Tinos font is quite similar to Times New Roman font and it looks more professional to me. This font is appropriate for axis titles in plot. I selected Open Sans within Sans Serif family. This font is very easy to read and has friendly appearance. Even though the Open Sans font was optimized for print, web, and mobile interfaces, I like it for professional graphs and presentations. Among display fronts, I selected Lobster font. I think Lobster font is good for certain display for informal presentation, but it is not good for plots. Choosing the right font depends on the purpose and the audience.